# Estimate power plant response function using OGE data
library(pacman)
p_load(
  here, fst, data.table, fixest, ggplot2, lubridate, 
  dplyr, janitor, purrr, furrr, stringr, AER, tictoc,
  magrittr
)

# Function that fits fmls for given data
fit_all_fmls_plant = function(
  fmls_in, 
  est_dt, 
  folder_in, 
  subfolder_in, 
  max_nerc_load, 
  plant_id_eia_in
){
  # Running models for all of the formulas 
  plant_mod_coef_dt = 
    map_dfr(
      fmls_in,
      \(fml){
        # Fitting the model
        unit_mod = 
          AER::tobit(
            formula = as.formula(fml), 
            left = 0,
            right = est_dt$namepcap[1],
            data = est_dt
          )
        # Collecting results 
        return(
          data.table(
            rn = c(names(unit_mod$coefficients),'log_scale'),
            estimate = c(unit_mod$coefficients, log(unit_mod$scale))
          )[,':='(
            plant_id_eia = plant_id_eia_in,
            fml = fml, 
            loglik = unit_mod$loglik[2],
            loglik_df = unit_mod$df
          )]
        )
      }
    )
  # Saving raw coefficients 
  write.fst(
    x = plant_mod_coef_dt,
    path = here(
      "Data/electricity-generation", folder_in, subfolder_in,
      paste0("plant-mod-coefs-",plant_id_eia_in,".fst")
    )
  )
  # Determining the preferred model
  # - All coefficients must be positive 
  # - Must be increasing over range of observed loads in own region  
  # - Picking model with highest log-liklihood conditional on above
  # Casting to wide, now have one row for every model 
  coef_dt = 
    plant_mod_coef_dt |>
    dcast(
      plant_id_eia + fml + loglik + loglik_df ~ rn,
      value.var = "estimate",
      fill = 0
    ) |>
    setnames(
      old = '(Intercept)',
      new = 'intercept'
    ) %>% .[,':='(
      has_sq_term = str_detect(fml, 'sq')
    )]
  # Adding missing columns (excess load outside of interconnection)
  missing_cols = 
    data.table()[,
      c(paste0('excess_load_',c('cal','mro','npcc','rfc','serc','tre','wecc')),
        paste0('excess_load_sq_',c('cal','mro','npcc','rfc','serc','tre','wecc'))) 
        := rep(0, nrow(coef_dt))
    ] |> 
    select(-any_of(colnames(coef_dt)))
  coef_dt = cbind(coef_dt,missing_cols)
  # Adding nerc to coef table 
  coef_dt[,nerc_adj := est_dt$nerc_adj[1]]
  # Checking that derivative wrt own region excess load is
  # positive at at max excess load, and all coefs are positive
  coef_dt[,':='(
    increasing_at_max = 
      excess_load_cal  + 2*excess_load_sq_cal *max_nerc_load['CAL']  >= 0 &
      excess_load_mro  + 2*excess_load_sq_mro *max_nerc_load['MRO']  >= 0 &
      excess_load_npcc + 2*excess_load_sq_npcc*max_nerc_load['NPCC'] >= 0 &
      excess_load_rfc  + 2*excess_load_sq_rfc *max_nerc_load['RFC']  >= 0 &
      excess_load_serc + 2*excess_load_sq_serc*max_nerc_load['SERC'] >= 0 &
      excess_load_tre  + 2*excess_load_sq_tre *max_nerc_load['TRE']  >= 0 &
      excess_load_wecc + 2*excess_load_sq_wecc*max_nerc_load['WECC'] >= 0,
    coefs_positive = 
      excess_load_cal  >= 0 & 
      excess_load_mro  >= 0 & 
      excess_load_npcc >= 0 & 
      excess_load_rfc  >= 0 & 
      excess_load_serc >= 0 & 
      excess_load_tre  >= 0 & 
      excess_load_wecc >= 0 
  )][,# Creating indicator for models that fit our restrictions 
     good_model := coefs_positive & increasing_at_max
  ]
  # Adding the estimated extremum 
  coef_dt[, ':='(
    est_extremum_cal = ifelse(
      excess_load_sq_cal != 0,
      -excess_load_cal/(2*excess_load_sq_cal),
      NA),
    est_extremum_mro  = ifelse(
      excess_load_sq_mro != 0,
      -excess_load_mro/(2*excess_load_sq_mro),
      NA),
    est_extremum_npcc = ifelse(
      excess_load_sq_npcc != 0,
      -excess_load_npcc/(2*excess_load_sq_npcc),
      NA),
    est_extremum_rfc  = ifelse(
      excess_load_sq_rfc != 0,
      -excess_load_rfc/(2*excess_load_sq_rfc),
      NA),
    est_extremum_serc = ifelse(
      excess_load_sq_serc != 0,
      -excess_load_serc/(2*excess_load_sq_serc),
      NA),
    est_extremum_tre  = ifelse(
      excess_load_sq_tre != 0,
      -excess_load_tre/(2*excess_load_sq_tre),
      NA),
    est_extremum_wecc = ifelse(
      excess_load_sq_wecc != 0,
      -excess_load_wecc/(2*excess_load_sq_wecc),
      NA)
  )]
  return(coef_dt)
}

# Function to fit and clean all models for single unit 
fit_plant = function(
  plant_id_eia_in, 
  oge_elec_gen_dt, 
  fml_dt, 
  max_nerc_load, 
  folder_in = 'plant-model-fit'
){
  print(plant_id_eia_in)
  # Reading data for a single unit
  plant_gen_dt = oge_elec_gen_dt[.(plant_id_eia_in)]
  # Running models for all of the formulas 
  all_model_dt = fit_all_fmls_plant(
    fmls_in = fml_dt[nerc_adj == plant_gen_dt[1]$nerc_adj]$ic_formula,
    est_dt = plant_gen_dt, 
    folder_in = folder_in, 
    subfolder_in = 'coefs', 
    max_nerc_load = max_nerc_load,
    plant_id_eia_in = plant_id_eia_in
  )
  # Running models for transmission constraints: LOW 
  all_model_low_dt = fit_all_fmls_plant(
    fmls_in = fml_dt[nerc_adj == plant_gen_dt[1]$nerc_adj]$ic_formula,
    est_dt = plant_gen_dt[above_transmission_constraint == FALSE],
    folder_in = folder_in, 
    subfolder_in = 'coefs-low', 
    max_nerc_load = max_nerc_load,
    plant_id_eia_in = plant_id_eia_in
  )
  # Running models for transmission constraints: HIGH (own region only)
  all_model_high_dt = fit_all_fmls_plant(
    fmls_in = fml_dt[
        nerc_adj == plant_gen_dt[1]$nerc_adj & 
        own_region_only == TRUE
      ]$ic_formula,
    est_dt = plant_gen_dt[above_transmission_constraint == TRUE],
    folder_in = folder_in, 
    subfolder_in = 'coefs-high', 
    max_nerc_load = max_nerc_load,
    plant_id_eia_in = plant_id_eia_in
  )
  # Choosing the best (highest log liklihood) model among the good ones
  # (Excludes units that have **NO** good models)
  good_model_dt = all_model_dt[
    good_model == TRUE, 
    .SD[which.max(loglik)], 
    by = .(plant_id_eia)
  ]
  # Saving the results
  write.fst(
    x = good_model_dt,
    path = here(paste0(
      "Data/electricity-generation/",folder_in,"/good-model-dt/good-model-dt-",
      plant_id_eia_in,".fst"
    ))
  )
  # Repeating for a model with just own region load 
  own_region_model_dt = merge(
    all_model_dt,
    fml_dt[,.(ic_formula, own_region_only)] |> unique(),
    by.x = 'fml', by.y = 'ic_formula',
  )[own_region_only == TRUE & good_model == TRUE,
    .SD[which.max(loglik)], 
    by = .(plant_id_eia)
  ]
  # Saving the results
  write.fst(
    x = own_region_model_dt,
    path = here(paste0(
      "Data/electricity-generation/",folder_in,"/own-region-model-dt/own-region-model-dt-",
      plant_id_eia_in,".fst"
    ))
  )
  # Doing same for low/high transmission constraint
  good_model_low_dt = all_model_low_dt[
    good_model == TRUE, 
    .SD[which.max(loglik)], 
    by = .(plant_id_eia)
  ]
  write.fst(
    x = good_model_low_dt,
    path = here(
      "Data/electricity-generation", folder_in,'transmission-constraint-low-model-dt',
      paste0(
        "transmission-constraint-low-model-dt-",
        plant_id_eia_in,".fst"
    ))
  )
  # Now high transmission constraints 
  good_model_high_dt = all_model_high_dt[
    good_model == TRUE, 
    .SD[which.max(loglik)], 
    by = .(plant_id_eia)
  ]
  write.fst(
    x = good_model_high_dt,
    path = here(
      "Data/electricity-generation", folder_in,'transmission-constraint-high-model-dt',
      paste0(
        "transmission-constraint-high-model-dt-",
        plant_id_eia_in,".fst"
    ))
  )
  # Calculating raw fitted values
  plant_gen_dt[,':='(
    fit_net_generation_mwh_raw = good_model_dt$intercept + 
      good_model_dt$excess_load_cal*excess_load_cal +
      good_model_dt$excess_load_mro*excess_load_mro +
      good_model_dt$excess_load_npcc*excess_load_npcc +
      good_model_dt$excess_load_rfc*excess_load_rfc +
      good_model_dt$excess_load_serc*excess_load_serc +
      good_model_dt$excess_load_tre*excess_load_tre +
      good_model_dt$excess_load_wecc*excess_load_wecc +
      good_model_dt$excess_load_sq_cal *excess_load_sq_cal +
      good_model_dt$excess_load_sq_mro *excess_load_sq_mro +
      good_model_dt$excess_load_sq_npcc*excess_load_sq_npcc +
      good_model_dt$excess_load_sq_rfc *excess_load_sq_rfc +
      good_model_dt$excess_load_sq_serc*excess_load_sq_serc +
      good_model_dt$excess_load_sq_tre *excess_load_sq_tre +
      good_model_dt$excess_load_sq_wecc*excess_load_sq_wecc,
    fit_net_generation_mwh_raw_own_region = own_region_model_dt$intercept + 
      own_region_model_dt$excess_load_cal*excess_load_cal +
      own_region_model_dt$excess_load_mro*excess_load_mro +
      own_region_model_dt$excess_load_npcc*excess_load_npcc +
      own_region_model_dt$excess_load_rfc*excess_load_rfc +
      own_region_model_dt$excess_load_serc*excess_load_serc +
      own_region_model_dt$excess_load_tre*excess_load_tre +
      own_region_model_dt$excess_load_wecc*excess_load_wecc +
      own_region_model_dt$excess_load_sq_cal *excess_load_sq_cal +
      own_region_model_dt$excess_load_sq_mro *excess_load_sq_mro +
      own_region_model_dt$excess_load_sq_npcc*excess_load_sq_npcc +
      own_region_model_dt$excess_load_sq_rfc *excess_load_sq_rfc +
      own_region_model_dt$excess_load_sq_serc*excess_load_sq_serc +
      own_region_model_dt$excess_load_sq_tre *excess_load_sq_tre +
      own_region_model_dt$excess_load_sq_wecc*excess_load_sq_wecc
  )]
  # Drawing epsilons 
  N = nrow(plant_gen_dt)
  plant_gen_dt[,':='(
    epsilon = rnorm(N, mean = 0, sd = exp(good_model_dt$log_scale)),
    epsilon_own_region = rnorm(N, mean = 0, sd = exp(own_region_model_dt$log_scale))
  )]
  # Censoring the fitted values
  plant_gen_dt[,':='(
    fit_net_generation_mwh = fcase(
      fit_net_generation_mwh_raw + epsilon >= 0 & 
        fit_net_generation_mwh_raw + epsilon <= namepcap, 
      fit_net_generation_mwh_raw + epsilon,
      fit_net_generation_mwh_raw + epsilon < 0, 0,
      fit_net_generation_mwh_raw + epsilon > namepcap, namepcap
    ),
    fit_net_generation_mwh_own_region = fcase(
      fit_net_generation_mwh_raw_own_region + epsilon_own_region >= 0 
      & fit_net_generation_mwh_raw_own_region + epsilon_own_region <= namepcap, 
      fit_net_generation_mwh_raw_own_region + epsilon_own_region,
      fit_net_generation_mwh_raw_own_region + epsilon_own_region < 0, 0,
      fit_net_generation_mwh_raw_own_region + epsilon_own_region > namepcap, namepcap
    )
  )]
  # Saving the results
  write.fst(
    x = plant_gen_dt[,.(
      plant_id_eia, datetime_utc, 
      net_generation_mwh, 
      fit_net_generation_mwh,
      co2e_mass_lb_for_electricity, 
      nox_mass_lb_for_electricity, 
      so2_mass_lb_for_electricity, 
      fuel_consumed_for_electricity_mmbtu,
      fit_net_generation_mwh_raw, 
      fit_net_generation_mwh_own_region, 
      fit_net_generation_mwh_raw_own_region, 
      epsilon,
      epsilon_own_region,
      net_generation_mwh_raw 
    )],
    path = here(paste0(
      "Data/electricity-generation/",folder_in,"/plant-gen-dt/plant-gen-dt-",
      plant_id_eia_in,".fst"
    ))
  )
}


fit_models_nerc = function(
  nerc_adj_in,
  fml_dt,
  max_nerc_load,
  time_bounds =  c(ymd_hms('2019-01-01 10:00:00'), ymd_hms('2019-12-31 23:00:00')), 
  elec_gen_path_in = 'elec-gen-dt/elec-gen-dt', 
  folder_in = 'plant-model-fit'
){
  # Loading the electricity generation data
  oge_elec_gen_dt =
    read.fst(
      path = here(paste0(
        "Data/electricity-generation/",
        elec_gen_path_in,'-',
        tolower(nerc_adj_in),
        ".fst"
      )),
      as.data.table = TRUE
    )[# Filtering to time bounds and excluding wind/solar
      datetime_utc %between% c(time_bounds[1],time_bounds[2]) & 
      !(fuel_category %in% c('wind','solar'))
    ] |> 
    setkey(plant_id_eia, datetime_utc) 
  # Getting list of units not yet run 
  plant_id_eia_dt = 
    oge_elec_gen_dt[,.(
      tot_net_generation_mwh = sum(net_generation_mwh),
      var_net_generation_mwh = var(net_generation_mwh)), 
      keyby= .(plant_id_eia, fuel_category, above_transmission_constraint)
    ] |>
    dcast(
      formula = plant_id_eia +fuel_category ~ above_transmission_constraint, 
      value.var = c('tot_net_generation_mwh','var_net_generation_mwh'), 
      fill = 0
    )
  plant_id_eia_already_run = 
    list.files(here("Data/electricity-generation",folder_in,"coefs-high")) |> 
    str_remove("plant-mod-coefs-") |>
    str_remove("\\.fst") |>
    as.integer()
  plant_id_eia_to_run = plant_id_eia_dt[
    !(plant_id_eia %in% plant_id_eia_already_run) & 
    tot_net_generation_mwh_TRUE > 0 &
    tot_net_generation_mwh_FALSE > 0 &
    var_net_generation_mwh_TRUE > 0 &
    var_net_generation_mwh_FALSE > 0 
  ]$plant_id_eia  
  # Runnning for all units 
  future_map(
    plant_id_eia_to_run,
    fit_plant,
    folder_in = folder_in,
    oge_elec_gen_dt = oge_elec_gen_dt,
    fml_dt = fml_dt, 
    max_nerc_load = max_nerc_load, 
    .options = furrr_options(seed = TRUE)
  )
}



# Function to run nerc-level function for all nerc regions
fit_models_all_units = function(
  nerc_adj_in = c('CAL','MRO','NPCC','RFC','SERC','TRE','WECC'),
  time_bounds =  c(ymd_hms('2019-01-01 10:00:00'), ymd_hms('2019-12-31 23:00:00')),
  elec_gen_path_in = 'elec-gen-dt/elec-gen-dt', 
  folder_in = 'plant-model-fit',
  delete = FALSE
){
  # Formula changes depending on where the plant is located 
  # Table with all of the formulas 
  fml_dt_raw = 
    data.table(
      nerc_adj = c(
        'TRE',
        rep('WECC',2),
        rep('CAL', 2),
        rep('MRO', 8),
        rep('NPCC',8),
        rep('RFC', 8),
        rep('SERC',8)
      ),
      control_1 = c(
        NA, 
        NA, 'cal', 
        NA, 'wecc', 
        NA, 'npcc','npcc','npcc','rfc', 'npcc','rfc', 'serc', 
        NA, 'mro', 'mro', 'mro', 'rfc', 'mro', 'rfc', 'serc', 
        NA, 'mro', 'mro', 'mro', 'npcc','mro', 'npcc','serc', 
        NA, 'mro', 'mro', 'mro', 'npcc','mro', 'npcc','rfc'
      ),
      control_2 = c(
        NA, 
        NA, NA, 
        NA, NA, 
        NA, 'rfc', 'rfc', 'serc','serc', NA, NA, NA, 
        NA, 'rfc', 'rfc', 'serc','serc', NA, NA, NA, 
        NA, 'npcc','npcc','serc','serc', NA, NA, NA, 
        NA, 'npcc','npcc','rfc', 'rfc',  NA, NA, NA
      ),
      control_3 = c(
        NA, 
        NA, NA, 
        NA, NA, 
        NA, 'serc', NA, NA, NA, NA, NA, NA,
        NA, 'serc', NA, NA, NA, NA, NA, NA, 
        NA, 'serc', NA, NA, NA, NA, NA, NA, 
        NA, 'rfc',  NA, NA, NA, NA, NA, NA
      )
    )
  fml_dt =
    rbind(
      fml_dt_raw[,.(nerc_adj, control_1, control_2, control_3, terms = 'quadratic')],
      fml_dt_raw[,.(nerc_adj, control_1, control_2, control_3, terms = 'linear')],
      fml_dt_raw[
        !(is.na(control_1) & is.na(control_2) & is.na(control_3)),
        .(nerc_adj, control_1, control_2, control_3, terms = 'quadratic other')
      ],
      fml_dt_raw[
        is.na(control_1) & is.na(control_2) & is.na(control_3),
        .(nerc_adj, control_1, control_2, control_3, terms = 'constant')
      ]
    )[,
      ic_formula := 
        fcase(
          terms == 'linear', 
          paste(
            paste0('net_generation_mwh ~ excess_load_',tolower(nerc_adj)),
            paste0('excess_load_',control_1),
            paste0('excess_load_',control_2),
            paste0('excess_load_',control_3),
            sep = ' + '
          ),
          terms == 'quadratic', 
          paste(
            paste0('net_generation_mwh ~ excess_load_',tolower(nerc_adj)),
            paste0('excess_load_sq_',tolower(nerc_adj)),
            paste0('excess_load_',control_1),
            paste0('excess_load_',control_2),
            paste0('excess_load_',control_3),
            sep = ' + '
          ),
          terms == 'quadratic other', 
          paste(
            paste0('net_generation_mwh ~ excess_load_',tolower(nerc_adj)),
            paste0('excess_load_sq_',tolower(nerc_adj)),
            paste0('excess_load_',control_1),
            paste0('excess_load_',control_2),
            paste0('excess_load_',control_3),
            paste0('excess_load_sq_',control_1),
            paste0('excess_load_sq_',control_2),
            paste0('excess_load_sq_',control_3),
            sep = ' + '
          ),
          terms == 'constant', 'net_generation_mwh ~ 1'
        ) |> 
        str_remove_all(' \\+ excess_load_NA') |> 
        str_remove_all(' \\+ excess_load_sq_NA')
    ][,
      own_region_only := is.na(control_1) & is.na(control_2) & is.na(control_3)
    ]   
  # Loading nerc data to see max observed values 
  max_nerc_load_dt = 
    read.fst(
      path = here("Data/electricity-generation/clean-load/nerc-load-dt-oge.fst"),
      as.data.table = TRUE
    )[,.(
      #max_eload_loss_adj = max(excess_load_loss_adj),
      max_eload = max(excess_load)
      ), 
      keyby = nerc_adj
    ]
  if(str_detect(elec_gen_path_in, 'loss')){
    max_nerc_load = max_nerc_load_dt$max_eload_loss_adj
  }else{
    max_nerc_load = max_nerc_load_dt$max_eload
  }
  names(max_nerc_load) = max_nerc_load_dt$nerc_adj
  # Deleting old files 
  if(delete == TRUE){
    fp = paste0('Data/electricity-generation/',folder_in)
    unlink(here(fp,"coefs"), recursive = TRUE)
    unlink(here(fp,"coefs-low"), recursive = TRUE)
    unlink(here(fp,"coefs-high"), recursive = TRUE)
    unlink(here(fp,"plant-gen-dt"), recursive = TRUE)
    unlink(here(fp,"good-model-dt"), recursive = TRUE)
    unlink(here(fp,"own-region-model-dt"), recursive = TRUE)
    dir.create(here(fp,"coefs"))
    dir.create(here(fp,"coefs-low"))
    dir.create(here(fp,"coefs-high"))
    dir.create(here(fp,"plant-gen-dt"))
    dir.create(here(fp,"good-model-dt"))
    dir.create(here(fp,"own-region-model-dt"))
    dir.create(here(fp,"transmission-constraint-low-model-dt"))
    dir.create(here(fp,"transmission-constraint-high-model-dt"))
  }
  # Fitting models!
  set.seed(0218)
  map(
    nerc_adj_in,
    fit_models_nerc,
    time_bounds =  time_bounds, 
    fml_dt = fml_dt,
    max_nerc_load = max_nerc_load,
    elec_gen_path_in = elec_gen_path_in, 
    folder_in = folder_in
  )
}

clean_model_results = function(
  elec_gen_path_in = 'elec-gen-dt-oge', 
  folder_in = 'plant-model-fit',
  time_bounds =  c(ymd_hms('2019-01-01 10:00:00'), ymd_hms('2019-12-31 23:00:00'))
){
  # Combining all of the model files 
  plant_mod_coef_dt = 
    map_dfr(
      list.files(
        here("Data/electricity-generation",folder_in,"coefs"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    plant_mod_coef_dt,
    path = here("Data/electricity-generation",folder_in,"plant-mod-coef-dt.fst")
  )
  plant_mod_coef_low_dt = 
    map_dfr(
      list.files(
        here("Data/electricity-generation",folder_in,"coefs-low"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    plant_mod_coef_low_dt,
    path = here(
      'Data/electricity-generation',folder_in,'plant-mod-coef-low-dt.fst'
    )
  )
  plant_mod_coef_high_dt = 
    map_dfr(
      list.files(
        here("Data/electricity-generation",folder_in,"coefs-high"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    plant_mod_coef_high_dt,
    path = here(
      'Data/electricity-generation',folder_in,'plant-mod-coef-high-dt.fst'
    )
  )
  # Combining all of the good model files 
  good_model_dt = 
    map_dfr(
      list.files(
        here("Data/electricity-generation",folder_in,"good-model-dt"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    good_model_dt,
    path = here("Data/electricity-generation",folder_in,"good-model-dt.fst")
  )
  transmission_low_dt = 
    map_dfr(
      list.files(
        here(
          "Data/electricity-generation",folder_in,
          "transmission-constraint-low-model-dt"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    transmission_low_dt,
    path = here(
      "Data/electricity-generation", folder_in,
      "transmission-constraint-low-model-dt.fst"
    )
  )
  transmission_high_dt = 
    map_dfr(
      list.files(
        here(
          "Data/electricity-generation",folder_in,
          "transmission-constraint-high-model-dt"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    transmission_high_dt,
    path = here(
      "Data/electricity-generation", folder_in,
      "transmission-constraint-high-model-dt.fst"
    )
  )
  # Output for Mark to use 
  good_model_dt = 
    read.fst(
      path = here("Data/electricity-generation",folder_in,"good-model-dt.fst"),
      as.data.table = TRUE
    )[,.(
      orispl_code = plant_id_eia, 
      intercept,
      excess_load_cal,
      excess_load_cal_sq = excess_load_sq_cal,
      excess_load_mro,
      excess_load_mro_sq = excess_load_sq_mro,
      excess_load_npcc,
      excess_load_npcc_sq = excess_load_sq_npcc,
      excess_load_rfc,
      excess_load_rfc_sq = excess_load_sq_rfc,
      excess_load_serc,
      excess_load_serc_sq = excess_load_sq_serc,
      excess_load_tre,
      excess_load_tre_sq = excess_load_sq_tre,
      excess_load_wecc,
      excess_load_wecc_sq = excess_load_sq_wecc,
      log_scale, 
      est_extremum_cal,
      est_extremum_mro,
      est_extremum_npcc,
      est_extremum_rfc,
      est_extremum_serc,
      est_extremum_tre,
      est_extremum_wecc,
      loglik, loglik_df
    )] |>
    setkey(orispl_code) 
  # Writing the results as a csv
  fwrite(
    x = good_model_dt,
    file = here(
      "Data/electricity-generation/output",
      paste0("PPRegressors",str_remove(elec_gen_path_in, 'elec-gen-dt'),".csv")
    )
  )
  # Combining all of the own region model files 
  own_region_model_dt = 
    map_dfr(
      list.files(
        here("Data/electricity-generation",folder_in,"own-region-model-dt"), 
        full.names = TRUE
      ),
      read.fst, 
      as.data.table = TRUE
    )
  write.fst(
    own_region_model_dt,
    path = here("Data/electricity-generation",folder_in,"own-region-model-dt.fst")
  )
  # Now loading coefficients for own region only 
  own_region_model_dt = 
    read.fst(
      path = here("Data/electricity-generation",folder_in,"own-region-model-dt.fst"),
      as.data.table = TRUE
    )[,.(
      orispl_code = plant_id_eia, 
      intercept,
      excess_load_cal,
      excess_load_cal_sq = excess_load_sq_cal,
      excess_load_mro,
      excess_load_mro_sq = excess_load_sq_mro,
      excess_load_npcc,
      excess_load_npcc_sq = excess_load_sq_npcc,
      excess_load_rfc,
      excess_load_rfc_sq = excess_load_sq_rfc,
      excess_load_serc,
      excess_load_serc_sq = excess_load_sq_serc,
      excess_load_tre,
      excess_load_tre_sq = excess_load_sq_tre,
      excess_load_wecc,
      excess_load_wecc_sq = excess_load_sq_wecc,
      log_scale, 
      est_extremum_cal,
      est_extremum_mro,
      est_extremum_npcc,
      est_extremum_rfc,
      est_extremum_serc,
      est_extremum_tre,
      est_extremum_wecc,
      loglik, loglik_df
    )] |>
    setkey(orispl_code) 
  # Writing the results as a csv for mark 
  fwrite(
    x = own_region_model_dt,
    file = here(
      "Data/electricity-generation/output",
      paste0(
        "PPRegressors-own-region",
        str_remove(elec_gen_path_in, 'elec-gen-dt'),
        ".csv"
      )
    )
  )
  # Now loading coefficients for transmission constraints 
  transmission_high_dt = 
    transmission_high_dt[,.(
      orispl_code = plant_id_eia, 
      intercept,
      excess_load_cal,
      excess_load_cal_sq = excess_load_sq_cal,
      excess_load_mro,
      excess_load_mro_sq = excess_load_sq_mro,
      excess_load_npcc,
      excess_load_npcc_sq = excess_load_sq_npcc,
      excess_load_rfc,
      excess_load_rfc_sq = excess_load_sq_rfc,
      excess_load_serc,
      excess_load_serc_sq = excess_load_sq_serc,
      excess_load_tre,
      excess_load_tre_sq = excess_load_sq_tre,
      excess_load_wecc,
      excess_load_wecc_sq = excess_load_sq_wecc,
      log_scale, 
      est_extremum_cal,
      est_extremum_mro,
      est_extremum_npcc,
      est_extremum_rfc,
      est_extremum_serc,
      est_extremum_tre,
      est_extremum_wecc,
      loglik, loglik_df
    )] |>
    setkey(orispl_code) 
  # Writing the results as a csv for mark 
  fwrite(
    x = transmission_high_dt,
    file = here(
      "Data/electricity-generation/output",
      paste0(
        "PPRegressors-transmission-high",
        str_remove(elec_gen_path_in, 'elec-gen-dt'),
        ".csv"
      )
    )
  )
  transmission_low_dt = 
    transmission_low_dt[,.(
      orispl_code = plant_id_eia, 
      intercept,
      excess_load_cal,
      excess_load_cal_sq = excess_load_sq_cal,
      excess_load_mro,
      excess_load_mro_sq = excess_load_sq_mro,
      excess_load_npcc,
      excess_load_npcc_sq = excess_load_sq_npcc,
      excess_load_rfc,
      excess_load_rfc_sq = excess_load_sq_rfc,
      excess_load_serc,
      excess_load_serc_sq = excess_load_sq_serc,
      excess_load_tre,
      excess_load_tre_sq = excess_load_sq_tre,
      excess_load_wecc,
      excess_load_wecc_sq = excess_load_sq_wecc,
      log_scale, 
      est_extremum_cal,
      est_extremum_mro,
      est_extremum_npcc,
      est_extremum_rfc,
      est_extremum_serc,
      est_extremum_tre,
      est_extremum_wecc,
      loglik, loglik_df
    )] |>
    setkey(orispl_code) 
  # Writing the results as a csv for mark 
  fwrite(
    x = transmission_low_dt,
    file = here(
      "Data/electricity-generation/output",
      paste0(
        "PPRegressors-transmission-low",
        str_remove(elec_gen_path_in, 'elec-gen-dt'),
        ".csv"
      )
    )
  )
  # Writing plant info table for Mark to use 
  mark_plant_info_dt =
    read.fst(
      path = here("Data/electricity-generation/plant-info-dt-oge.fst"),
      as.data.table = TRUE
    )[year %in% unique(year(time_bounds)),
      .SD[which.min(year)], 
      by = .(plant_id_eia)
    ][plant_id_eia %in% good_model_dt$orispl_code] |>
    setkey(plant_id_eia)
  # Nameplate capacities
  fwrite(
    x = mark_plant_info_dt[,.(orispl_code = plant_id_eia, year, namepcap)],
    file = here("Data/electricity-generation/output/PPCap-oge.csv")
  )
  # Stack heights
  fwrite(
    x = mark_plant_info_dt[,.(orispl_code = plant_id_eia, stack_height)],
    file = here("Data/electricity-generation/output/StackHeight-oge.csv")
  )
  # Cleaning up
  rm(
    good_model_dt,
    own_region_model_dt,
    plant_mod_coef_dt, 
    plant_mod_coef_high_dt, 
    plant_mod_coef_low_dt, 
    transmission_high_dt, 
    transmission_low_dt
  )
  invisible(gc())
  # Hourly emissions data
  #elec_gen_fit_dt =
  #  map_dfr(
  #    list.files(
  #      here("Data/electricity-generation",folder_in,"plant-gen-dt"), 
  #      full.names = TRUE
  #    ),
  #    read.fst
  #  )|> 
  #  data.table() |>
  #  setkey(plant_id_eia, datetime_utc) 
  #write.fst(
  #  elec_gen_fit_dt,
  #  path = here("Data/electricity-generation",folder_in,"elec-gen-fit-dt.fst")
  #)
  # Load table 
  nerc_load_dt_raw = 
    read.fst(
      path = here("Data/electricity-generation/clean-load/nerc-load-dt-oge.fst"),
      as.data.table = TRUE
    )[datetime_utc %between% c(time_bounds[1], time_bounds[2])] |>
    setkey(nerc_adj, datetime_utc)
  # Reading the electricity generation files (with fitted values)
  # and summarizing production by region
  nerc_gen_dt = 
    map_dfr(
      c('CAL','MRO','NPCC','RFC','SERC','TRE','WECC'),
      \(nerc_adj_in){
        merge(
          mark_plant_info_dt[,.(plant_id_eia)],
          read.fst(
            path = here(
              'Data/electricity-generation/elec-gen-dt',
              paste0('elec-gen-dt-',tolower(nerc_adj_in),'.fst')
            ),
            as.data.table = TRUE
          ),
          by = c('plant_id_eia')
        )[datetime_utc %between% c(time_bounds[1], time_bounds[2]),
          .(tot_net_generation_mwh = sum(net_generation_mwh)),
          keyby = .(nerc_adj, datetime_utc)
        ]
      }
    )
  setkey(nerc_gen_dt, nerc_adj, datetime_utc)
  # Adding gen data to load data
  nerc_load_dt = 
    join(
      nerc_gen_dt,
      nerc_load_dt_raw, 
      on = c("nerc_adj","datetime_utc"), 
      how = 'inner'
    )
  # Renewables 
  fwrite(
    x = nerc_load_dt[,.(
      nerc_adj, 
      utc_time = datetime_utc, 
      load_nd, 
      load_solar, 
      load_wind = load_nd - load_solar,
      load_hydro
    )],
    file = here("Data/electricity-generation/output/RegRenew-oge.csv")
  )
  # Renewables: 
  solar_growth = 3.5
  wind_growth = 0.5
  fwrite(
    x = nerc_load_dt[,.(
      nerc_adj, utc_time = datetime_utc, 
      load_nd = load_solar*(1+solar_growth) + (load_nd - load_solar)*(1 + wind_growth), 
      load_solar = load_solar*(1+solar_growth), 
      load_wind = (load_nd - load_solar)*(1 + wind_growth),
      load_hydro
    )] ,
    file = here("Data/electricity-generation/output/RegRenew-oge-grow-renewables.csv")
  )
  # Renewables -loss adj
  # fwrite(
  #   x = nerc_load_dt[,.(
  #     nerc_adj, utc_time = datetime_utc, 
  #     load_nd = load_nd_loss_adj, 
  #     load_solar = load_solar_loss_adj, 
  #     load_wind_loss_adj = load_nd_loss_adj - load_solar_loss_adj,
  #     load_hydro = load_hydro_loss_adj
  #   )],
  #   file = here("Data/electricity-generation/output/RegRenew-oge-line-losses.csv")
  # )
  # Total Load
  fwrite(
    x = nerc_load_dt[,.(
      nerc_adj, 
      utc_time = datetime_utc, 
      excess_load, 
      excess_load_adj = excess_load - load_ff + tot_net_generation_mwh, 
      above_transmission_constraint = excess_load_high
    )],
    file = here("Data/electricity-generation/output/RegLoad-oge.csv")
  )
  # Total Load
  # fwrite(
  #   x = nerc_load_dt[,.(
  #     nerc_adj, utc_time = datetime_utc, 
  #     excess_load = excess_load_loss_adj, 
  #     excess_load_adj = excess_load_loss_adj - load_ff_loss_adj + tot_net_generation_mwh
  #   )],
  #   file = here("Data/electricity-generation/output/RegLoad-oge-line-losses.csv")
  # )
}


count_plants = function(){
  # Loading the electricity generation data
  plant_count_dt =
    map_dfr(
      c('CAL','MRO','NPCC','RFC','SERC','TRE','WECC'),
      \(x) {
        read.fst(
          path = here(paste0(
            "Data/electricity-generation/",
            'elec-gen-dt/elec-gen-dt-',
            tolower(x), ".fst"
          )),
          as.data.table = TRUE
        )[datetime_utc %between% c(
            ymd_hms('2019-01-01 10:00:00'), 
            ymd_hms('2019-12-31 23:00:00')
          ),
          .(
            obs = .N,
            tot_net_generation_mwh = sum(net_generation_mwh),
            var_net_generation_mwh = var(net_generation_mwh)), 
          keyby= .(plant_id_eia, fuel_category, above_transmission_constraint)
        ] |>
        dcast(
          formula = plant_id_eia +fuel_category ~ above_transmission_constraint, 
          value.var = c('tot_net_generation_mwh','var_net_generation_mwh','obs'), 
          fill = 0
        )
      }
    )
  # Counts of units 
  plant_count_dt[,
    .(count = .N, n_obs = sum(obs_TRUE + obs_FALSE)), 
    keyby = .(category = fcase(
      fuel_category %in% c('wind','solar'), 
        '01: nondispatchable',
      tot_net_generation_mwh_FALSE <= 0 | tot_net_generation_mwh_TRUE <= 0,       
        '02: negative or zero production', 
      var_net_generation_mwh_FALSE <= 0 | var_net_generation_mwh_TRUE <= 0, 
        '03: no production variance', 
        default = '04: In model'
    ))
  ] |>
  rbind(plant_count_dt[,.(count = .N, n_obs = sum(obs_TRUE + obs_FALSE)),keyby = .(category = ifelse(!is.na(plant_id_eia), '05: Total', NA))])
}
